
# set working directory
setwd("~/Research/Mycorrhizae/Fieldwork/Isotopes")

library(lme4)
library(nlme)
library(lmerTest)
library(olsrr)
library(MuMIn)
library(htmlTable)
library(ggplot2)
library(Rmisc)
library(SciViews)
library(gridExtra)
library(MASS)
library(regclass)

# define predictive R^2 functions for later
PRESS <- function(linear.model) {
  #' calculate the predictive residuals
  pr <- residuals(linear.model)/(1-lm.influence(linear.model)$hat)
  #' calculate the PRESS
  PRESS <- sum(pr^2)
  
  return(PRESS)
}

pred_r_squared <- function(linear.model) {
  #' Use anova() to get the sum of squares for the linear model
  lm.anova <- anova(linear.model)
  #' Calculate the total sum of squares
  tss <- sum(lm.anova$'Sum Sq')
  # Calculate the predictive R^2
  pred.r.squared <- 1-PRESS(linear.model)/(tss)
  
  return(pred.r.squared)
}

#########################################
# CREATING GRAPHS OF RAW DATA AND MEANS
################################################

min.soil.data <- read.csv("WSU soil isotopes for analysis.csv")

# remove bog sample
min.soil.data <- min.soil.data[ which(min.soil.data$Side != "Bog"),]

# make distance numeric
min.soil.data$Distance <- as.numeric(min.soil.data$Distance)

# choose lab
min.soil.data <- min.soil.data[ which(min.soil.data$Lab == "WSU"),]

# put distance 5 into 6 bin for analysis
for (i in 1:length(min.soil.data$Distance)){
  if (min.soil.data$Distance[i]==5){
    min.soil.data$Distance[i] <- 6
  }
}

# group Grey soil under Mineral
for (i in 1:length(min.soil.data$Distance)){
  if (min.soil.data$Type[i]=="Grey"){
    min.soil.data$Type[i] <- "Mineral"
  }
}

# by stream 
min.soil.data <- min.soil.data[ which(min.soil.data$Stream=="Hansen"),]
min.soil.data <- min.soil.data[ which(min.soil.data$Stream=="Happy"),]
min.soil.data <- min.soil.data[ which(min.soil.data$Stream=="Yako"),]

# by hansen side
min.soil.data <- min.soil.data[ which(min.soil.data$Side=="SL"),]
min.soil.data <- min.soil.data[ which(min.soil.data$Side=="SR"),]

# mineral or organic
min.soil.data <- min.soil.data[ which(min.soil.data$Type=="Organic"),]
min.soil.data <- min.soil.data[ which(min.soil.data$Type=="Mineral"),]

# select only 1-20 m
min.soil.data <- min.soil.data[ which(min.soil.data$Distance < 7),]

# choose transect
min.soil.data <- min.soil.data[ which(min.soil.data$Transect=="S6"),]

# examine C:N by Hansen bank organic 
tgc <- summarySE(min.soil.data, measurevar="dC13", groupvars=c("Side"))
tgc
ggplot(tgc, aes(x=Distance, y=d15N, color=Type)) + geom_point(position = position_dodge(.9), 
  stat = "summary", fun = "mean") + geom_line() +
  geom_errorbar(aes(ymin=d15N-se, ymax=d15N+se), width=.2,position=position_dodge(.9)) +
  theme_classic() + 
  geom_point(data=min.soil.data[which(min.soil.data$Side=="SL"),], aes(x=Distance, y=d15N), 
  alpha=0.2) + scale_x_reverse() 

# just plotting one variable 
tgc <- summarySE(min.soil.data, measurevar="d15N", groupvars=c("Distance"))
tgc
p <- ggplot(tgc, aes(x=Distance, y=d15N)) + geom_point(position = position_dodge(.9), 
  stat = "summary", fun = "mean") + geom_line() +
  geom_errorbar(aes(ymin=d15N-se, ymax=d15N+se), width=.2,position=position_dodge(.9)) +
  theme_classic() + 
  #scale_x_reverse(breaks=c(1,3,6,10,20, 40, 60, 100)) +
  scale_x_continuous(breaks=c(1,3,6,10,20, 40, 60, 100)) +
  geom_point(data=min.soil.data[which(min.soil.data$Side=="SR"),], aes(x=Distance, y=d15N), 
             alpha=0.2) + ylim(4,11) 
p
ggsave(plot=p, width=5, height=3, dpi=300, filename = "d15N_20_minsoil_right_raw.jpg")

# mean and errors
p <- ggplot(tgc, aes(x=Side, y=dC13)) + 
  geom_errorbar(aes(ymin=dC13-se, ymax=dC13+se), width=.1) + geom_line() + 
  geom_point() + theme_bw() + scale_x_discrete(labels=c('Enhanced', 'Depleted')) + 
  ylab("Mineral soil dC13") 
p
ggsave(plot=p, width=3, height=3, dpi=300, filename = "dC13_Hansen_minsoil_byside.jpg")


# for specific distances use #scale_x_continuous(breaks = c(1,3,6,10,20,40,60,100))
# for d15N at Hansen, use ylim(4,11)
# for total N at Hansen, use ylim(0.006, 0.06)
# for soil pH at Hansen, use ylim(3.5,6)
# for percent C at Hansen, use ylim(1.9, 48.88)
# for percent N at Hansen, use ylim(0.15, 2.65)

#######################################################################
# CREATE MODEL PREDICTIONS WITH RAW DATA
# Soil C:N, d15N and dC13 by distance and bank at Hansen
#####################################################################
min.soil.data <- read.csv("WSU soil isotopes for analysis.csv")

# by lab
min.soil.data <- min.soil.data[ which(min.soil.data$Lab == "WSU"),]

# remove bog sample
min.soil.data <- min.soil.data[ which(min.soil.data$Side != "Bog"),]

# make distance numeric
min.soil.data$Distance <- as.numeric(min.soil.data$Distance)

# by stream 
min.soil.data <- min.soil.data[ which(min.soil.data$Stream=="Hansen"),]

# by organic or mineral soil
min.soil.data <- min.soil.data[ which(min.soil.data$Type=="Mineral"),]

# select only 1-20 m
min.soil.data <- min.soil.data[ which(min.soil.data$Distance < 21),]

# choose transect
min.soil.data <- min.soil.data[ which(min.soil.data$Transect=="S6"),]

# is distance normally distributed

# center Distance variable
#min.soil.data$Distance <- scale(min.soil.data$Distance, scale=FALSE)

# create ln of Distance column
min.soil.data$lnDistance <- ln(min.soil.data$Distance)

# create quadratic lnDistance column
min.soil.data$lnDistance2 <- min.soil.data$lnDistance^2
min.soil.data$Distance2 <- (min.soil.data$Distance)^2

##############################################
# C:N #
#########################################
# select data
min.soil.data <- read.csv("WSU soil isotopes for analysis.csv")

# by lab
min.soil.data <- min.soil.data[ which(min.soil.data$Lab == "WSU"),]

# remove bog sample
min.soil.data <- min.soil.data[ which(min.soil.data$Side != "Bog"),]

# make distance numeric
min.soil.data$Distance <- as.numeric(min.soil.data$Distance)

# by stream 
min.soil.data <- min.soil.data[ which(min.soil.data$Stream=="Hansen"),]

# by organic or mineral soil
min.soil.data <- min.soil.data[ which(min.soil.data$Type=="Mineral"),]

# select only 1-20 m
min.soil.data <- min.soil.data[ which(min.soil.data$Distance < 21),]

# using just distance doesn't work

test1 <- scale(min.soil.data$GWC, scale=FALSE)
min.soil.data$GWC <- test1[,1]

min.soil.data$decomposing.carcass <- as.factor(min.soil.data$decomposing.carcass)
min.soil.data$carcass.deposition <- as.factor(min.soil.data$carcass.deposition)
min.soil.data$carcass.load <- as.numeric(min.soil.data$carcass.load)

# examine effect of distance on d15N ratio
# change na. action
options(na.action = "na.fail")
all.parms<-lm(log(C.N) ~ 
                Side + 
                ln(Distance) + 
                Side:ln(Distance) +
                #Side:I(ln(Distance)^2) + 
                #Side:I(ln(Distance)^3) + 
                #Side:I(ln(Distance)^4) + 
                GWC + 
                carcass.load + 
                #d15N +
                carcass.deposition + 
                decomposing.carcass,
              data=min.soil.data)
results<-dredge(all.parms)
subset(results, delta <2)
subset(results, delta == 0)

# all data
dredge.model <- lmer(d15N ~ ln(Distance) + GWC + decomposing.carcass + 
                        (1|Stream:Transect), data=min.soil.data)
summary(dredge.model)

dredge.model <- lmer(d15N ~ decomposing.carcass + GWC + ln(Distance) + (1|Stream:Transect), data=min.soil.data)
summary(dredge.model)

# get conditional r^2 (which takes into account random effects, second number)
MuMIn::r.squaredGLMM(lme4::lmer(d15N ~ ln(Distance) + GWC + carcass.deposition + decomposing.carcass + 
                                  carcass.load + (1|Stream:Transect), data=min.soil.data))

dredge.model <- lm(d15N ~ decomposing.carcass + GWC + ln(Distance), data=min.soil.data)
summary(dredge.model)

# get percentage variance explained by each variable using anova
af <- anova(dredge.model)
afss <- af$"Sum Sq"
blah <- cbind(af,PctExp=afss/sum(afss)*100)
blah$PctExp

# calculate VIF
library(car)
car::vif(dredge.model)

# for 1-20 mineral soil C:N
dredge.model <- lm(C.N ~ carcass.deposition + decomposing.carcass + ln(Distance) + GWC, data=min.soil.data)
summary(dredge.model)

# for 1-100 mineral soil C:N
dredge.model <- lm(C.N ~ decomposing.carcass + GWC +ln(Distance), data=min.soil.data)
summary(dredge.model)

# for 1-100
dredge.model <- lm(C.N ~ GWC + ln(Distance), data=min.soil.data)
summary(dredge.model)

# for 1-20
dredge.model <- lm(C.N ~ GWC + Side + Side:I(ln(Distance)^2), data=min.soil.data)
summary(dredge.model)

step.model <- stepAIC(all.parms, direction = "both", trace = FALSE)
summary(step.model)

pred_r_squared(step.model)

mod1 <- lm(C.N ~ Side, data=min.soil.data)
mod2 <- lm(C.N ~ Side + GWC, data=min.soil.data)
mod3 <- lm(C.N ~ ln(Distance) + GWC, data=min.soil.data)
mod4 <- lm(C.N ~ ln(Distance), data=min.soil.data)
mod5 <- lm(C.N ~ ln(Distance) + Side, data=min.soil.data)
mod6 <- lm(C.N ~ ln(Distance) + Side + GWC, data=min.soil.data)
mod7 <- lm(C.N ~ GWC, data=min.soil.data)
#mod8a <- lm(C.N ~ Side + lnDistance + Side:lnDistance + Side:I(lnDistance^2) + GWC, data=min.soil.data)
#mod8b <- lm(C.N ~ Side + lnDistance + Side*lnDistance + Side*I(lnDistance^2) + GWC, data=min.soil.data)
mod8c <- lm(C.N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I((ln(Distance))^2) + GWC, data=min.soil.data)
#mod8d <- lm(C.N ~ Side + ln(Distance) + Side*ln(Distance) + Side*I((ln(Distance))^2) + GWC, data=min.soil.data)
#mod8e <- lm(C.N ~ Side + ln(Distance) + Side*ln(Distance) + Side*lnDistance2 + GWC, data=min.soil.data)
#mod8f <- lm(C.N ~ Side + ln(Distance) + Side*ln(Distance) + lnDistance2 + Side*lnDistance2 + GWC, data=min.soil.data)
mod9 <- lm(C.N ~ Side + ln(Distance) + Side:ln(Distance), data=min.soil.data)
mod10 <- lm(C.N ~ Side + ln(Distance) + Side:ln(Distance) + GWC, data=min.soil.data)
mod11 <- lm(C.N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I((ln(Distance))^2), data=min.soil.data)
mod12 <- lm(C.N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I((ln(Distance))^2) + GWC, data=min.soil.data)
mod13 <- lm(C.N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I(ln(Distance)^2) + Side:I(ln(Distance)^3), data=min.soil.data)
mod13a <- lm(C.N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I(ln(Distance)^2) + Side:I(ln(Distance)^3) + GWC, data=min.soil.data)
mod13b <- lm(C.N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I(ln(Distance)^2) + Side:I(ln(Distance)^3) + GWC, data=min.soil.data)
mod13c <- lm(C.N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I(ln(Distance)^2) + Side:I(ln(Distance)^3), data=min.soil.data)
mod14 <- lm(C.N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I(ln(Distance)^3), data=min.soil.data)
mod14a <- lm(C.N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I(ln(Distance)^3), data=min.soil.data)
mod14b <- lm(C.N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I(ln(Distance)^3) + GWC, data=min.soil.data)
# although model 15s have as low AIC as model 12, they do not include individual
# variables of side and distance which are important 
mod15a <- lm(C.N ~ Side:I(ln(Distance)^2) + GWC, data=min.soil.data)
mod15 <- lm(C.N ~ Side:I(ln(Distance)^3) + GWC, data=min.soil.data)
mod16 <- lm(C.N ~ Side:poly(ln(Distance), 3, raw=TRUE) + GWC, data=min.soil.data)
mod16a <- lm(C.N ~ Side:poly(ln(Distance), 4, raw=TRUE) + GWC, data=min.soil.data)
mod16b <- lm(C.N ~ Side:poly(ln(Distance), 5, raw=TRUE) + GWC, data=min.soil.data)
mod16c <- lm(C.N ~ Side:poly(ln(Distance), 6, raw=TRUE) + GWC, data=min.soil.data)
mod16d <- lm(C.N ~ Side:poly(ln(Distance), 7, raw=TRUE) + GWC, data=min.soil.data)
mod17 <- lm(C.N ~ Side:poly(ln(Distance), 3, raw=TRUE), data=min.soil.data)
mod18 <- lm(C.N ~ Side:poly(ln(Distance), 4, raw=TRUE), data=min.soil.data)
mod19 <- lm(C.N ~ Side:poly(ln(Distance), 5, raw=TRUE), data=min.soil.data)
mod20 <- lm(C.N ~ Side:poly(ln(Distance), 6, raw=TRUE), data=min.soil.data)
mod21 <- lm(C.N ~ Side:poly(ln(Distance), 3, raw=TRUE) + GWC, data=min.soil.data)
mod22 <- lm(C.N ~ Side:poly(ln(Distance), 4, raw=TRUE) + GWC, data=min.soil.data)
mod23 <- lm(C.N ~ Side:poly(ln(Distance), 5, raw=TRUE) + GWC, data=min.soil.data)
mod24 <- lm(C.N ~ Side:poly(ln(Distance), 6, raw=TRUE) + GWC, data=min.soil.data)

results <- AIC(mod1, mod2, mod3, mod4, mod5, mod6, mod7, mod8c, mod9, mod10, mod11, mod12,
               mod13, mod13a, mod13b, mod13c, mod14, mod14a, mod14b, 
               #mod16, mod16a, mod16b, mod16c, mod16d, mod17, mod18, mod19, mod20, mod21, mod22, 
               #mod23, mod24, 
               step.model, dredge.model)
ordered <- results[order(-results$AIC),]
ordered
summary(mod10)

# for mineral C:N 1-100, use dredge.model
# manual model selection shows best models are mod3 and mod6, use mod3

# for mineral C:N 1-20, best models are STEP

# select the model that will be used to plot predictions
select.mod <- dredge.model

pred_interval <- seq(1,20,0.2)
which.side <- "SL"
example <- data.frame(Distance=pred_interval, Side=which.side, 
                      total.N = mean(min.soil.data$total.N[which(min.soil.data$Side==which.side)]),
                      GWC = mean(min.soil.data$GWC[which(min.soil.data$Side==which.side)]),
                      decomposing.carcass="yes")
min.soil.pred1 <- predict(select.mod, newdata=example, re.form=NA, type="response", interval='confidence')
min.soil.pred1 <- as.data.frame(min.soil.pred1)
min.soil.pred1$Distance <- pred_interval

p1 <- ggplot(data=min.soil.pred1, aes(x=Distance, y=fit))+ geom_line(color="sienna") +
  geom_ribbon(data=min.soil.pred1, aes(ymin=lwr, ymax=upr), linetype=0, alpha=0.1) + 
  geom_point(data = min.soil.data[which(min.soil.data$Side==which.side),], aes(x = Distance, 
  y = C.N, group=Distance, color = NULL), alpha = 0.1, color="sienna")+
  theme(panel.background = element_blank(),panel.border = element_rect(colour = "black", 
  fill = NA, linewidth = 1.2)) + labs(y = "C.N", x="Distance") + theme_classic()+
  ylim(10,26) +
  scale_x_reverse(breaks=c(1,3,6,10,20, 40, 60, 100)) 

pred_interval <- seq(1,20,0.2)
which.side <- "SR"
example <- data.frame(Distance=pred_interval, Side=which.side, 
                      total.N = mean(min.soil.data$total.N[which(min.soil.data$Side==which.side)]),
                      GWC = mean(min.soil.data$GWC[which(min.soil.data$Side==which.side)]),
                      decomposing.carcass="yes")
min.soil.pred1 <- predict(select.mod, newdata=example, re.form=NA, type="response", interval='confidence')
min.soil.pred1 <- as.data.frame(min.soil.pred1)
min.soil.pred1$Distance <- pred_interval

p2 <- ggplot(data=min.soil.pred1, aes(x=Distance, y=fit))+ geom_line(color="sienna") +
  geom_ribbon(data=min.soil.pred1, aes(ymin=lwr, ymax=upr), linetype=0, alpha=0.1) + 
  geom_point(data = min.soil.data[which(min.soil.data$Side==which.side),], aes(x = Distance, 
  y = C.N, group=Distance, color = NULL), alpha = 0.1, color="sienna")+
  theme(panel.background = element_blank(),panel.border = element_rect(colour = "black", 
  fill = NA, linewidth = 1.2)) + labs(y = "C.N", x="Distance") + theme_classic()+
  ylim(10,26) +
  scale_x_continuous(breaks=c(1,3,6,10,20, 40, 60, 100)) 

grid.arrange(p1, p2, nrow=1)

ggsave(plot=p1, width=5, height=3, dpi=300, filename = "CN_20_minsoil_L.jpg")
ggsave(plot=p2, width=5, height=3, dpi=300, filename = "CN_20_minsoil_R.jpg")

# for mineral 1:100 C:N, use limits ylim(10,26)
# for mineral 1:20 C:N, use limits ylim(10,22.5)

# model diagnostics
# see if residuals are normally distributed
plot(density(mod8$residuals))
qqnorm(mod8$residuals)
qqline(mod8$residuals)
shapiro.test(mod8$residuals)
# plot residuals against predicted values to determine heteroscedacitty
plot(fitted(mod8),mod8$residuals)
abline(0,0)

#####################################################################
# d15N - here, include total N but do not include GWC
###################################################################
# select data
min.soil.data <- read.csv("WSU soil isotopes for analysis.csv")

# by lab
min.soil.data <- min.soil.data[ which(min.soil.data$Lab == "WSU"),]

# remove bog sample
min.soil.data <- min.soil.data[ which(min.soil.data$Side != "Bog"),]

# make distance numeric
min.soil.data$Distance <- as.numeric(min.soil.data$Distance)

# by stream 
min.soil.data <- min.soil.data[ which(min.soil.data$Stream=="Hansen"),]

# by organic or mineral soil
min.soil.data <- min.soil.data[ which(min.soil.data$Type=="Mineral"),]

# select only 1-20 m
min.soil.data <- min.soil.data[ which(min.soil.data$Distance < 21),]

# scale variables
test <- scale(min.soil.data$PercentN, scale=FALSE)
min.soil.data$PercentN <- test[,1]

test1 <- scale(min.soil.data$GWC, scale=FALSE)
min.soil.data$GWC <- test1[,1]

min.soil.data$decomposing.carcass <- as.factor(min.soil.data$decomposing.carcass)
min.soil.data$carcass.deposition <- as.factor(min.soil.data$carcass.deposition)
min.soil.data$carcass.load <- as.numeric(min.soil.data$carcass.load)

# for 1-20, use side and distance up to squared and %N and GWC

# examine effect of distance on d15N ratio
# change na. action
options(na.action = "na.fail")
all.parms<-lm(d15N ~ 
                #Side:ln(Distance) +
                #Side:I(ln(Distance)^2) + 
                #Side:I(ln(Distance)^3) + 
                #Side:I(ln(Distance)^4) + 
                PercentN + 
                ln(Distance) +
                #Side + 
                carcass.deposition + 
                decomposing.carcass + 
                GWC,  
                #Transect,
              data=min.soil.data)
results<-dredge(all.parms)
subset(results, delta <2)
subset(results, delta == 0)

# all data
dredge.model <- lmer(d15N ~ ln(Distance) + PercentN + carcass.load + carcass.deposition + decomposing.carcass + GWC + (1|Stream:Transect),
                     data=min.soil.data)
summary(dredge.model)

dredge.model <- lm(d15N ~ decomposing.carcass + GWC + ln(Distance), data=min.soil.data)
summary(dredge.model)

dredge.model <- lmer(d15N ~ decomposing.carcass + GWC + ln(Distance) + (1|Stream:Transect), data=min.soil.data)
summary(dredge.model)

# get conditional r^2 (which takes into account random effects, second number)
MuMIn::r.squaredGLMM(lme4::lmer(d15N ~ ln(Distance) + PercentN + carcass.load + carcass.deposition + decomposing.carcass + GWC + (1|Stream:Transect),
                                data=min.soil.data))

pred_r_squared(dredge.model)

# get percentage variance explained by each variable using anova
af <- anova(dredge.model)
afss <- af$"Sum Sq"
blah <- cbind(af,PctExp=afss/sum(afss)*100)
blah$PctExp

# calculate VIF
library(car)
car::vif(dredge.model)

# for mineral soil 1-20, top model
dredge.model <- lm(d15N ~ decomposing.carcass + PercentN + ln(Distance), data=min.soil.data)
summary(dredge.model)
# significant variables are decomposing carcass (almost significant), ln(Distance), and Percent N

# for mineral soil 1-100, top model
dredge.model <- lm(d15N ~ decomposing.carcass + ln(Distance) + PercentN, data=min.soil.data)
summary(dredge.model)
# significant variables are decomposing carcass, ln(Distance), and Percent N

# top dredge model for 1-100 mineral soil 
dredge.model <- lm(d15N ~ decomposing.carcass + ln(Distance) + PercentN + carcass.deposition, data=min.soil.data)
summary(dredge.model)

# for 1-100
dredge.model <- lm(d15N ~ decomposing.carcass + ln(Distance) + PercentN, data=min.soil.data)

# for 1-20
dredge.model <- lm(d15N ~ ln(Distance) + PercentN + decomposing.carcass, data=min.soil.data)
summary(dredge.model)

step.model <- stepAIC(all.parms, direction = "both", trace = FALSE)
summary(step.model)

pred_r_squared(dredge.model)

# ln distance
mod1 <- lm(d15N ~ Side, data=min.soil.data)
mod2 <- lm(d15N ~ PercentN, data=min.soil.data)
mod3 <- lm(d15N ~ Side + PercentN, data=min.soil.data)
mod4 <- lm(d15N ~ ln(Distance), data=min.soil.data)
mod5 <- lm(d15N ~ ln(Distance) + PercentN, data=min.soil.data)
mod6 <- lm(d15N ~ ln(Distance) + GWC, data=min.soil.data)
mod7 <- lm(d15N ~ ln(Distance) + PercentN + GWC, data=min.soil.data)
mod8 <- lm(d15N ~ ln(Distance) + Side, data=min.soil.data)
mod9 <- lm(d15N ~ ln(Distance) + Side + PercentN, data=min.soil.data)
mod10 <- lm(d15N ~ ln(Distance) + Side + GWC, data=min.soil.data)
mod11 <- lm(d15N ~ Side + ln(Distance) + Side:ln(Distance), data=min.soil.data)
mod12 <- lm(d15N ~ Side + ln(Distance) + Side:ln(Distance) + PercentN, data=min.soil.data)
mod13 <- lm(d15N ~ Side + ln(Distance) + Side:ln(Distance) + PercentN + GWC, data=min.soil.data)
mod13a <- lm(d15N ~ Side + ln(Distance) + Side:ln(Distance) + GWC, data=min.soil.data)
mod14 <- lm(d15N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I(ln(Distance)^2) + PercentN, data=min.soil.data)
mod15 <- lm(d15N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I(ln(Distance)^2), data=min.soil.data)
mod16 <- lm(d15N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I((ln(Distance))^2) + PercentN + GWC, data=min.soil.data)
mod17 <- lm(d15N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I(ln(Distance)^2) + Side:I(ln(Distance)^3), data=min.soil.data)
mod18 <- lm(d15N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I(ln(Distance)^3), data=min.soil.data)
mod19 <- lm(d15N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I(ln(Distance)^3) + PercentN, data=min.soil.data)
mod20 <- lm(d15N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I(ln(Distance)^3) + PercentN + GWC, data=min.soil.data)
# although model 15s have as low AIC as model 12, they do not include individual
# variables of side and distance which are important 
mod21 <- lm(d15N ~ Side:I(ln(Distance)^2) + PercentN + GWC, data=min.soil.data)
mod22 <- lm(d15N ~ Side:I(ln(Distance)^3) + PercentN + GWC, data=min.soil.data)
mod23 <- lm(d15N ~ Side:poly(ln(Distance), 5, raw=TRUE) + PercentN + GWC, data=min.soil.data)
mod24 <- lm(d15N ~ Side:poly(ln(Distance), 4, raw=TRUE) + PercentN + GWC, data=min.soil.data)
mod25 <- lm(d15N ~ Side:poly(ln(Distance), 3, raw=TRUE) + PercentN + GWC, data=min.soil.data)
mod26 <- lm(d15N ~ Side:poly(ln(Distance), 6, raw=TRUE) + PercentN + GWC, data=min.soil.data)
mod27 <- lm(d15N ~ Side:poly(ln(Distance), 5, raw=TRUE), data=min.soil.data)
mod28 <- lm(d15N ~ Side:poly(ln(Distance), 4, raw=TRUE), data=min.soil.data)
mod29 <- lm(d15N ~ Side:poly(ln(Distance), 3, raw=TRUE), data=min.soil.data)
mod30 <- lm(d15N ~ Side:poly(ln(Distance), 6, raw=TRUE), data=min.soil.data)

#adding transect as random effect increased AIC and did not change model
# predictions
mod12.lmer <- lmer(d15N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I((ln(Distance))^2) + PercentN + GWC + (1|Transect), data=min.soil.data, REML=FALSE)
mod16b.lmer <- lmer(d15N ~ Side:poly(ln(Distance), 4, raw=TRUE) + PercentN + GWC + (1|Transect), data=min.soil.data, REML=FALSE)
mod16c.lmer <- lmer(d15N ~ Side:poly(ln(Distance), 3, raw=TRUE) + PercentN + GWC + (1|Transect), data=min.soil.data, REML=FALSE)

# untransformed distance to compare yielded no hump for 1-20 m

# model comparison
results <- AIC(mod1, mod2, mod3, mod4, mod5, mod6, mod7, mod8, mod9, mod10, mod11, 
    mod12, mod13, mod13a, mod14, mod15, mod16, mod17, mod18, mod19, mod20, mod21, mod22, 
    #mod23, mod24, mod25, mod26, mod27, mod28, mod29, mod30, 
    step.model, dredge.model)
#results <- AIC(mod2, mod4, mod5, mod6, mod7, step.model, dredge.model)
ordered <- results[order(-results$AIC),]
ordered
summary(mod13a)

# for mineral 1-100, best model is dredge model (same as step)
# for mineral 1-20,  best model is dredge model (same as step)

# select the model that will be used to plot predictions
select.mod <- step.model

# plot raw data with model estimates for LEFT bank
pred_interval <- seq(1,20,0.2)
which.side <- "SL"
example <- data.frame(Distance=pred_interval, Side=which.side, 
           PercentN = mean(min.soil.data$PercentN[which(min.soil.data$Side==which.side)]), 
          GWC=mean(min.soil.data$GWC[which(min.soil.data$Side==which.side)]),
          decomposing.carcass="no",
          Transect="S1")
min.soil.pred1 <- predict(select.mod, newdata=example, re.form=NA, type="response", interval='confidence')
min.soil.pred1 <- as.data.frame(min.soil.pred1)
min.soil.pred1$Distance <- pred_interval

p1 <- ggplot(data=min.soil.pred1, aes(x=Distance, y=fit))+ 
  geom_line(color = "sienna") +
  geom_ribbon(data=min.soil.pred1, aes(ymin=lwr, ymax=upr), linetype=0, alpha=0.1) + 
  geom_point(data = min.soil.data[which(min.soil.data$Side==which.side),], aes(x = Distance, 
  y = d15N, group=Distance), alpha = 0.1, color="sienna")+
  theme(panel.background = element_blank(),panel.border = element_rect(colour = 
  "black", fill = NA, linewidth = 1.2)) + 
  labs(y = "d15N", x="Distance") + theme_classic()+theme(legend.position="none")+
  ylim(3, 10.61) + 
  scale_x_reverse(breaks=c(1,3,6,10,20, 40, 60, 100)) 

# plot raw data with model estimates for RIGHT bank
pred_interval <- seq(1,20,0.2)
which.side <- "SR"
example <- data.frame(Distance=pred_interval, Side=which.side, 
           PercentN = mean(min.soil.data$PercentN[which(min.soil.data$Side==which.side)]), 
           GWC=mean(min.soil.data$GWC[which(min.soil.data$Side==which.side)]),
           decomposing.carcass="no",
           Transect="S1")
min.soil.pred1 <- predict(select.mod, newdata=example, re.form=NA, type="response", interval='confidence')
min.soil.pred1 <- as.data.frame(min.soil.pred1)
min.soil.pred1$Distance <- pred_interval

p2 <- ggplot(data=min.soil.pred1, aes(x=Distance, y=fit))+ 
  geom_line(color = "sienna") +
  geom_ribbon(data=min.soil.pred1, aes(ymin=lwr, ymax=upr), linetype=0, alpha=0.1) + 
  geom_point(data = min.soil.data[which(min.soil.data$Side==which.side),], aes(x = Distance, 
  y = d15N, group=Distance), alpha = 0.1, color="sienna")+
  theme(panel.background = element_blank(),panel.border = element_rect(colour = 
  "black", fill = NA, linewidth = 1.2)) + 
  labs(y = "d15N", x="Distance") + theme_classic()+theme(legend.position="none")+
  ylim(3, 10.61) + 
  scale_x_continuous(breaks=c(1,3,6,10,20, 40, 60, 100)) 

grid.arrange(p1, p2, nrow=1)

ggsave(plot=p1, width=5, height=3, dpi=300, filename = "d15N_20_minsoil_L.jpg")
ggsave(plot=p2, width=5, height=3, dpi=300, filename = "d15N_20_minsoil_R.jpg")

# for mineral d15N 1-100 limits, use ylim(3, 10.61)
# for mineral d15N 1-20 limits, for step.model use ylim(3.2,15) and for 
# mod4 use ylim(4, 11)

# non parametric regression, but only accepts one variable
library(mblm)
mod1 = mblm(d15N ~ Distance, data=min.soil.data)
mod2 = mblm(d15N ~ GWC, data=min.soil.data)
AIC(mod1, mod2)
summary(mod1)

# calculate variance inflation factor for correlated variables total N and GWC
library(car)
library(rms)
library(DAAG)
rms::vif(mod12)
car::vif(mod12)
DAAG::vif(mod12)
# looks like it's okay to include both total N and GWC, only .4 correlation anyways

#####################################################################
# dC13 - here, include total N but do not include GWC
#####################################################################
# select data
min.soil.data <- read.csv("WSU soil isotopes for analysis.csv")

# remove bog sample
min.soil.data <- min.soil.data[ which(min.soil.data$Side != "Bog"),]

# by lab
min.soil.data <- min.soil.data[ which(min.soil.data$Lab == "WSU"),]

# make distance numeric
min.soil.data$Distance <- as.numeric(min.soil.data$Distance)

# by stream 
min.soil.data <- min.soil.data[ which(min.soil.data$Stream=="Hansen"),]

# by organic or mineral soil
min.soil.data <- min.soil.data[ which(min.soil.data$Type=="Mineral"),]

# select only 1-20 m
min.soil.data <- min.soil.data[ which(min.soil.data$Distance < 21),]

# scale variables
test <- scale(min.soil.data$PercentN, scale=FALSE)
min.soil.data$PercentN <- test[,1]

test1 <- scale(min.soil.data$GWC, scale=FALSE)
min.soil.data$GWC <- test1[,1]

min.soil.data$decomposing.carcass <- as.factor(min.soil.data$decomposing.carcass)
min.soil.data$carcass.deposition <- as.factor(min.soil.data$carcass.deposition)
min.soil.data$carcass.load <- as.numeric(min.soil.data$carcass.load)

# examine effect of distance on dC13 ratio
# change na. action
options(na.action = "na.fail")
all.parms<-lm(dC13 ~ 
                #Side + 
                ln(Distance) + 
                #Side:ln(Distance) +
                #Side:I(ln(Distance)^2) + 
                #Side:I(ln(Distance)^3) + 
                #Side:I(ln(Distance)^4) + 
                PercentN +
                decomposing.carcass + 
                GWC +
                carcass.deposition,
              data=min.soil.data)
results<-dredge(all.parms)
subset(results, delta <2)
subset(results, delta == 0)

# all data
dredge.model <- lmer(dC13 ~ ln(Distance) + PercentN + decomposing.carcass + 
                       carcass.deposition + carcass.load + GWC + 
                       (1|Stream:Transect), data=min.soil.data)
summary(dredge.model)

dredge.model <- lm(dC13 ~ ln(Distance) + PercentN, data=min.soil.data)
summary(dredge.model)

dredge.model <- lmer(dC13 ~ ln(Distance) + PercentN + (1|Stream:Transect), data=min.soil.data)
summary(dredge.model)

# get conditional r^2 (which takes into account random effects, second number)
MuMIn::r.squaredGLMM(lme4::lmer(dC13 ~ ln(Distance) + PercentN + decomposing.carcass + 
                                  carcass.deposition + carcass.load + GWC + 
                                  (1|Stream:Transect), data=min.soil.data))

# get percentage variance explained by each variable using anova
af <- anova(dredge.model)
afss <- af$"Sum Sq"
blah <- cbind(af,PctExp=afss/sum(afss)*100)
blah$PctExp

# calculate VIF
library(car)
car::vif(dredge.model)

# for 1-20 mineral soil (with deposition variable)
dredge.model <- lm(dC13 ~ GWC + PercentN, data=min.soil.data)
summary(dredge.model)


# for 1-100 mineral soil (with deposition variable)
dredge.model <- lm(dC13 ~ GWC + PercentN + Side, data=min.soil.data)
summary(dredge.model)

# for both 1-100
dredge.model <- lm(dC13 ~ Side + PercentN, data=min.soil.data)
summary(dredge.model)

# for both 1-20
dredge.model <- lm(dC13 ~ decomposing.carcass + GWC + PercentN, data=min.soil.data)
summary(dredge.model)

step.model <- stepAIC(all.parms, direction = "both", trace = FALSE)
summary(step.model)

pred_r_squared(dredge.model)

# run models
mod1 <- lm(dC13 ~ Side, data=min.soil.data)
mod2 <- lm(dC13 ~ PercentN, data=min.soil.data)
mod3 <- lm(dC13 ~ Side + PercentN, data=min.soil.data)
mod4 <- lm(dC13 ~ ln(Distance), data=min.soil.data)
mod5 <- lm(dC13 ~ ln(Distance) + PercentN, data=min.soil.data)
mod6 <- lm(dC13 ~ ln(Distance) + GWC, data=min.soil.data)
mod7 <- lm(dC13 ~ ln(Distance) + PercentN + GWC, data=min.soil.data)
mod8 <- lm(dC13 ~ ln(Distance) + Side, data=min.soil.data)
mod9 <- lm(dC13 ~ ln(Distance) + Side + PercentN, data=min.soil.data)
mod10 <- lm(dC13 ~ ln(Distance) + Side + GWC, data=min.soil.data)
mod11 <- lm(dC13 ~ Side + ln(Distance) + Side:ln(Distance), data=min.soil.data)
mod12 <- lm(dC13 ~ Side + ln(Distance) + Side:ln(Distance) + PercentN, data=min.soil.data)
mod13 <- lm(dC13 ~ Side + ln(Distance) + Side:ln(Distance) + PercentN + GWC, data=min.soil.data)
mod13a <- lm(dC13 ~ Side + ln(Distance) + Side:ln(Distance) + GWC, data=min.soil.data)
mod14 <- lm(dC13 ~ Side + ln(Distance) + Side:ln(Distance) + Side:I(ln(Distance)^2) + PercentN, data=min.soil.data)
mod15 <- lm(dC13 ~ Side + ln(Distance) + Side:ln(Distance) + Side:I(ln(Distance)^2), data=min.soil.data)
mod16 <- lm(dC13 ~ Side + ln(Distance) + Side:ln(Distance) + Side:I((ln(Distance))^2) + PercentN + GWC, data=min.soil.data)
mod17 <- lm(dC13 ~ Side + ln(Distance) + Side:ln(Distance) + Side:I(ln(Distance)^2) + Side:I(ln(Distance)^3), data=min.soil.data)
mod18 <- lm(dC13 ~ Side + ln(Distance) + Side:ln(Distance) + Side:I(ln(Distance)^3), data=min.soil.data)
mod19 <- lm(dC13 ~ Side + ln(Distance) + Side:ln(Distance) + Side:I(ln(Distance)^3) + PercentN, data=min.soil.data)
mod20 <- lm(dC13 ~ Side + ln(Distance) + Side:ln(Distance) + Side:I(ln(Distance)^3) + PercentN + GWC, data=min.soil.data)
# although model 15s have as low AIC as model 12, they do not include individual
# variables of side and distance which are important 
mod21 <- lm(dC13 ~ Side:I(ln(Distance)^2) + PercentN + GWC, data=min.soil.data)
mod22 <- lm(dC13 ~ Side:I(ln(Distance)^3) + PercentN + GWC, data=min.soil.data)
mod23 <- lm(dC13 ~ Side:poly(ln(Distance), 5, raw=TRUE) + PercentN + GWC, data=min.soil.data)
mod24 <- lm(dC13 ~ Side:poly(ln(Distance), 4, raw=TRUE) + PercentN + GWC, data=min.soil.data)
mod25 <- lm(dC13 ~ Side:poly(ln(Distance), 3, raw=TRUE) + PercentN + GWC, data=min.soil.data)
mod26 <- lm(dC13 ~ Side:poly(ln(Distance), 6, raw=TRUE) + PercentN + GWC, data=min.soil.data)
mod27 <- lm(dC13 ~ Side:poly(ln(Distance), 5, raw=TRUE), data=min.soil.data)
mod28 <- lm(dC13 ~ Side:poly(ln(Distance), 4, raw=TRUE), data=min.soil.data)
mod29 <- lm(dC13 ~ Side:poly(ln(Distance), 3, raw=TRUE), data=min.soil.data)
mod30 <- lm(dC13 ~ Side:poly(ln(Distance), 6, raw=TRUE), data=min.soil.data)

mod2.lmer <- lmer(dC13 ~ PercentN + (1|Transect), data=min.soil.data, REML=FALSE)
mod9.lmer <- lmer(dC13 ~ ln(Distance) + Side + PercentN + (1|Transect), data=min.soil.data, REML=FALSE)

results <- AIC(mod1, mod2, mod3, mod4, mod5, mod6, mod7, mod8, mod9, mod10, mod11, 
               mod12, mod13, mod13a, mod14, mod15, mod16, mod17, mod18, mod19, mod20, mod21, mod22,
               mod23, mod24, mod25, mod26, mod27, mod28, mod29, mod30, step.model,
               dredge.model)
AIC(mod2, mod4, mod5, mod6, mod7)
ordered <- results[order(-results$AIC),]
ordered
summary(mod9)

# for 1-100, dredge and step model are best
# for 1-20, step is best

select.mod <- dredge.model

# plot raw data with model estimates for LEFT bank
pred_interval <- seq(1,20,0.2)
which.side <- "SL"
example <- data.frame(Distance=pred_interval, Side=which.side, 
                      PercentN = mean(min.soil.data$PercentN[which(min.soil.data$Side==which.side)]), 
                      GWC=mean(min.soil.data$GWC[which(min.soil.data$Side==which.side)]))
min.soil.pred1 <- predict(select.mod, newdata=example, re.form=NA, type="response", interval='confidence')
min.soil.pred1 <- as.data.frame(min.soil.pred1)
min.soil.pred1$Distance <- pred_interval

p1 <- ggplot(data=min.soil.pred1, aes(x=Distance, y=fit))+ 
  geom_line(color = "sienna") +
  geom_ribbon(data=min.soil.pred1, aes(ymin=lwr, ymax=upr), linetype=0, alpha=0.1) + 
  geom_point(data = min.soil.data[which(min.soil.data$Side==which.side),], aes(x = Distance, 
  y = dC13, group=Distance), alpha = 0.1, color="sienna")+
  theme(panel.background = element_blank(),panel.border = element_rect(colour = 
  "black", fill = NA, linewidth = 1.2)) + 
  labs(y = "dC13", x="Distance") + theme_classic()+theme(legend.position="none")+
  ylim(-28,-25.5) + 
  scale_x_reverse(breaks=c(1,3,6,10,20, 40, 60, 100)) 

# plot raw data with model estimates for RIGHT bank
pred_interval <- seq(1,20,0.2)
which.side <- "SR"
example <- data.frame(Distance=pred_interval, Side=which.side, 
                      PercentN = mean(min.soil.data$PercentN[which(min.soil.data$Side==which.side)]), 
                      GWC=mean(min.soil.data$GWC[which(min.soil.data$Side==which.side)]))
min.soil.pred1 <- predict(select.mod, newdata=example, re.form=NA, type="response", interval='confidence')
min.soil.pred1 <- as.data.frame(min.soil.pred1)
min.soil.pred1$Distance <- pred_interval

p2 <- ggplot(data=min.soil.pred1, aes(x=Distance, y=fit))+ 
  geom_line(color = "sienna") +
  geom_ribbon(data=min.soil.pred1, aes(ymin=lwr, ymax=upr), linetype=0, alpha=0.1) + 
  geom_point(data = min.soil.data[which(min.soil.data$Side==which.side),], aes(x = Distance, 
  y = dC13, group=Distance), alpha = 0.1, color="sienna")+
  theme(panel.background = element_blank(),panel.border = element_rect(colour = 
  "black", fill = NA, linewidth = 1.2)) + 
  labs(y = "dC13", x="Distance") + theme_classic()+theme(legend.position="none")+
  ylim(-28,-25.5) + 
  scale_x_continuous(breaks=c(1,3,6,10,20, 40, 60, 100)) 

grid.arrange(p1, p2, nrow=1)

ggsave(plot=p1, width=5, height=3, dpi=300, filename = "dC13_20_minsoil_L.jpg")
ggsave(plot=p2, width=5, height=3, dpi=300, filename = "dC13_20_minsoil_R.jpg")


# for mineral dC13 1-100 limits, use ylim(-28,-25.5)
# for mineral dC13 1-20 limits, use ylim(-28, -26)

############################################
# GWC
############################################

mod1 <- lm(GWC ~ Side, data=min.soil.data)
mod2 <- lm(GWC ~ ln(Distance), data=min.soil.data)
mod3 <- lm(GWC ~ ln(Distance) + Side, data=min.soil.data)
mod4 <- lm(GWC ~ Side + ln(Distance) + Side:ln(Distance), data=min.soil.data)
mod5 <- lm(GWC ~ Side + ln(Distance) + Side:ln(Distance) + Side:I(ln(Distance)^2), data=min.soil.data)

AIC(mod1, mod2, mod3, mod4, mod5)
summary(mod5)

# plot raw data with model estimates
pred_interval <- seq(1,100,1)
example <- data.frame(Distance=pred_interval, Side="SR")
pred1 <- predict(mod3, newdata=example, re.form=NA, type="response", interval='confidence')
pred1 <- as.data.frame(pred1)
pred1$Distance <- pred_interval

ggplot(data=pred1, aes(x=Distance, y=fit))+ geom_line() + 
  geom_ribbon(data=pred1, aes(ymin=lwr, ymax=upr), linetype=0, alpha=0.1) + 
  geom_point(data = min.soil.data[which(min.soil.data$Side=="SR"),], aes(x = Distance, 
                                                                 y = GWC, group=Distance, color = NULL), alpha = 0.1)+
  theme(panel.background = element_blank(),panel.border = element_rect(colour = "black", 
                                                                       fill = NA, linewidth = 1.2)) + labs(y = "GWC", x="Distance") +
  ylim(0.75, 8.8)# +
scale_x_reverse()

# for mineral GWC 1-100, 1-20, and 1-10,use ylim(0.15, 3.75)
# for organic GWC 1-100, use ylim(0.75, 8.8)

##########################################
# total N #
#########################################

# total N
mod1 <- lm(total.N ~ Side, data=min.soil.data)
mod2 <- lm(total.N ~ Side + GWC, data=min.soil.data)
mod3 <- lm(total.N ~ ln(Distance) + GWC, data=min.soil.data)
mod4 <- lm(total.N ~ ln(Distance), data=min.soil.data)
mod5 <- lm(total.N ~ ln(Distance) + Side, data=min.soil.data)
mod6 <- lm(total.N ~ ln(Distance) + Side + GWC, data=min.soil.data)
mod7 <- lm(total.N ~ GWC, data=min.soil.data)
mod8 <- lm(total.N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I((ln(Distance))^2) + GWC, data=min.soil.data)
mod9 <- lm(total.N ~ Side + ln(Distance) + Side:ln(Distance), data=min.soil.data)
mod10 <- lm(total.N ~ Side + ln(Distance) + Side:ln(Distance) + GWC, data=min.soil.data)
mod11 <- lm(total.N ~ Side + ln(Distance) + Side:ln(Distance) + Side:I((ln(Distance))^2), data=min.soil.data)
AIC(mod1, mod2, mod3, mod4, mod5, mod6, mod7, mod8, mod9, mod10, mod11)
# top models are 10 and 8
summary(mod11)

# model diagnostics
# see if residuals are normally distributed
plot(density(mod8$residuals))
qqnorm(mod8$residuals)
qqline(mod8$residuals)
shapiro.test(mod8$residuals)
# plot residuals against predicted values to determine heteroscedacitty
plot(fitted(mod8),mod8$residuals)
abline(0,0)

# plot raw data with model estimates
pred_interval <- seq(1,100,1)
example <- data.frame(Distance=pred_interval, Side="SR")
pred1 <- predict(mod11, newdata=example, re.form=NA, type="response", interval='confidence')
pred1 <- as.data.frame(pred1)
pred1$Distance <- pred_interval

ggplot(data=pred1, aes(x=Distance, y=fit))+ geom_line() + 
  geom_ribbon(data=pred1, aes(ymin=lwr, ymax=upr), linetype=0, alpha=0.1) + 
  geom_point(data = min.soil.data[which(min.soil.data$Side=="SR"),], aes(x = Distance, 
                                                                 y = total.N, group=Distance, color = NULL), alpha = 0.1)+
  theme(panel.background = element_blank(),panel.border = element_rect(colour = "black", 
                                                                       fill = NA, linewidth = 1.2)) + labs(y = "total N", x="Distance")# +
ylim(0.75, 8.8)# +
scale_x_reverse()  

####################################################################
# Organic soil C:N, d15N and dC13 by distance at Happy
####################################################################
min.soil.data <- read.csv("WSU soil isotopes for analysis.csv")

# remove bog
min.soil.data <- min.soil.data[ which(min.soil.data$Side != "Bog"),]

# make distance numeric
min.soil.data$Distance <- as.numeric(min.soil.data$Distance)

# by stream 
min.soil.data <- min.soil.data[ which(min.soil.data$Stream=="Happy"),]

# by type
min.soil.data <- min.soil.data[ which(min.soil.data$Type=="Organic"),]

# C:N
mod1 <- lm(C.N ~ ln(Distance) + GWC, data=min.soil.data)
mod2 <- lm(C.N ~ I(ln(Distance)^2) + GWC, data=min.soil.data)
mod3 <- lm(C.N ~ ln(Distance), data=min.soil.data)
mod4 <- lm(C.N ~ I(ln(Distance)^2), data=min.soil.data)
mod5 <- lm(C.N ~ I(ln(Distance)^3), data=min.soil.data)
mod6 <- lm(C.N ~ GWC, data=min.soil.data)
AIC(mod1, mod2, mod3, mod4, mod5, mod6)
# top models are 2,3,4
summary(mod1)


# non parametric regression
library(mblm)
mod1 = mblm(C.N ~ Distance, data=min.soil.data)
mod2 = mblm(C.N ~ GWC, data=min.soil.data)
AIC(mod1, mod2)
summary(mod1)

# plot raw data with model estimates
pred_interval <- c(1,3,6,10,20,40,60,100)
example <- data.frame(Distance=pred_interval, GWC=mean(min.soil.data$GWC))
pred1 <- predict(mod1, newdata=example, re.form=NA, type="response", interval='confidence')
pred1 <- as.data.frame(pred1)
pred1$Distance <- pred_interval

ggplot(data=pred1, aes(x=Distance, y=fit))+ geom_point() + geom_line() + 
  geom_errorbar(aes(x=Distance, ymin=lwr, ymax=upr)) + 
  geom_point(data = min.soil.data, aes(x = Distance, y = C.N, group=Distance, 
                                   color = NULL), alpha = 0.1)+
  theme(panel.background = element_blank(),panel.border = element_rect(colour = "black", 
                                                                       fill = NA, linewidth = 1.2)) + labs(y = "C.N", x="Distance")

# d15N
mod1 <- lm(d15N ~ ln(Distance) + GWC, data=min.soil.data)
mod2 <- lm(d15N ~ I(ln(Distance)^2) + GWC, data=min.soil.data)
mod3 <- lm(d15N ~ ln(Distance), data=min.soil.data)
mod4 <- lm(d15N ~ I(ln(Distance)^2), data=min.soil.data)
mod5 <- lm(d15N ~ I(ln(Distance)^3), data=min.soil.data)
mod6 <- lm(d15N ~ GWC, data=min.soil.data)
AIC(mod1, mod2, mod3, mod4, mod5, mod6)
# all are great - NO EFFECT
summary(mod1)

# dC13
mod1 <- lm(dC13 ~ ln(Distance) + GWC, data=min.soil.data)
mod2 <- lm(dC13 ~ I(ln(Distance)^2) + GWC, data=min.soil.data)
mod3 <- lm(dC13 ~ ln(Distance), data=min.soil.data)
mod4 <- lm(dC13 ~ I(ln(Distance)^2), data=min.soil.data)
mod5 <- lm(dC13 ~ I(ln(Distance)^3), data=min.soil.data)
mod6 <- lm(dC13 ~ GWC, data=min.soil.data)
AIC(mod1, mod2, mod3, mod4, mod5, mod6)
# mod 4 and 5 best
summary(mod4)

# plot raw data with model estimates
pred_interval <- c(1,3,6,10,20,40,60,100)
example <- data.frame(Distance=pred_interval, GWC=mean(min.soil.data$GWC))
pred1 <- predict(mod4, newdata=example, re.form=NA, type="response", interval='confidence')
pred1 <- as.data.frame(pred1)
pred1$Distance <- pred_interval

ggplot(data=pred1, aes(x=Distance, y=fit))+ geom_point() + geom_line() + 
  geom_errorbar(aes(x=Distance, ymin=lwr, ymax=upr)) + 
  geom_point(data = min.soil.data, aes(x = Distance, y = dC13, group=Distance, 
                                   color = NULL), alpha = 0.1)+
  theme(panel.background = element_blank(),panel.border = element_rect(colour = "black", 
                                                                       fill = NA, linewidth = 1.2)) + labs(y = "dC13", x="Distance")

# total N
mod1 <- lm(total.N ~ ln(Distance) + GWC, data=min.soil.data)
mod2 <- lm(total.N ~ I(ln(Distance)^2) + GWC, data=min.soil.data)
mod3 <- lm(total.N ~ ln(Distance), data=min.soil.data)
mod4 <- lm(total.N ~ I(ln(Distance)^2), data=min.soil.data)
mod5 <- lm(total.N ~ I(ln(Distance)^3), data=min.soil.data)
mod6 <- lm(total.N ~ GWC, data=min.soil.data)
AIC(mod1, mod2, mod3, mod4, mod5, mod6)
summary(mod5)

####################################################################
# Organic soil C:N, d15N and dC13 by distance at Yako
####################################################################
min.soil.data <- read.csv("WSU soil isotopes for analysis.csv")

# make distance numeric
min.soil.data$Distance <- as.numeric(min.soil.data$Distance)

# by stream 
min.soil.data <- min.soil.data[ which(min.soil.data$Stream=="Yako"),]

# by type
min.soil.data <- min.soil.data[ which(min.soil.data$Type=="Organic"),]

# C:N
mod1 <- lm(C.N ~ ln(Distance) + GWC, data=min.soil.data)
mod2 <- lm(C.N ~ I(ln(Distance)^2) + GWC, data=min.soil.data)
mod3 <- lm(C.N ~ ln(Distance), data=min.soil.data)
mod4 <- lm(C.N ~ I(ln(Distance)^2), data=min.soil.data)
mod5 <- lm(C.N ~ I(ln(Distance)^3), data=min.soil.data)
mod6 <- lm(C.N ~ GWC, data=min.soil.data)
AIC(mod1, mod2, mod3, mod4, mod5, mod6)
# top models are 2,6
summary(mod6)

# d15N
mod1 <- lm(d15N ~ ln(Distance) + GWC, data=min.soil.data)
mod2 <- lm(d15N ~ I(ln(Distance)^2) + GWC, data=min.soil.data)
mod3 <- lm(d15N ~ ln(Distance), data=min.soil.data)
mod4 <- lm(d15N ~ I(ln(Distance)^2), data=min.soil.data)
mod5 <- lm(d15N ~ I(ln(Distance)^3), data=min.soil.data)
mod6 <- lm(d15N ~ GWC, data=min.soil.data)
AIC(mod1, mod2, mod3, mod4, mod5, mod6)
# top models are 1,3
summary(mod2)

# dC13
mod1 <- lm(dC13 ~ ln(Distance) + GWC, data=min.soil.data)
mod2 <- lm(dC13 ~ I(ln(Distance)^2) + GWC, data=min.soil.data)
mod3 <- lm(dC13 ~ ln(Distance), data=min.soil.data)
mod4 <- lm(dC13 ~ I(ln(Distance)^2), data=min.soil.data)
mod5 <- lm(dC13 ~ I(ln(Distance)^3), data=min.soil.data)
mod6 <- lm(dC13 ~ GWC, data=min.soil.data)
AIC(mod1, mod2, mod3, mod4, mod5, mod6)
# mod 1 and 2 best
summary(mod6)

# total N
mod1 <- lm(total.N ~ ln(Distance) + GWC, data=min.soil.data)
mod2 <- lm(total.N ~ I(ln(Distance)^2) + GWC, data=min.soil.data)
mod3 <- lm(total.N ~ ln(Distance), data=min.soil.data)
mod4 <- lm(total.N ~ I(ln(Distance)^2), data=min.soil.data)
mod5 <- lm(total.N ~ I(ln(Distance)^3), data=min.soil.data)
mod6 <- lm(total.N ~ GWC, data=min.soil.data)
AIC(mod1, mod2, mod3, mod4, mod5, mod6)
# mod 1 and 2 best
summary(mod1)

